*** 
*** Code for "The Labor Market Integration of Refugee Migrants in High-Income Countries"
*** Courtney Brell, Christian Dustmann, and Ian Preston
***
*** Analysis of the UK Survey of New Refugees
*** This file should be run in the folder containing the SNR data
***

********************************************************************************
*** Preliminaries

clear all

* Import data
use "stata8\survey_of_new_refugees", clear

* Generate demographic variables
gen countryoforigin=q9_origin
label values countryoforigin q9_origin
gen female = .
replace female=1 if q1_gender==2
replace female=0 if q1_gender==1
gen age_cat=q2_age_group

* Calculate the number of years in the UK prior to the initial wave (estimated based on the middle of the corresponding category)
recode q8_time_uk (1=0.25) (2=0.75) (3=1.5) (4=3.5) (else=.), gen(prioryearsinuk)
* Calculate the number of years since arrival
* Note that waves are 0,8,15,21 months after decision, respectively
gen yearssincearrive0=prioryearsinuk
gen yearssincearrive1=prioryearsinuk+8/12
gen yearssincearrive2=prioryearsinuk+15/12
gen yearssincearrive3=prioryearsinuk+21/12

* All observations are refugees
gen migranttype=1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant

* Choose our sample
keep if age_cat<5
* Generate survey weights
gen indweight0=basewght
gen indweight1=f1_weight
gen indweight2=f2_weight
gen indweight3=f3_weight

********************************************************************************
*** Calculate employment outcomes

* Employment
gen employment1=1 if inlist(f1q19_ecomonic_activity,1,2,3)
replace employment1=0 if inlist(f1q19_ecomonic_activity,4,5,6,7,8,11)
gen employment2=1 if inlist(f2q19_econ_activity,1,2,3)
replace employment2=0 if inlist(f2q19_econ_activity,4,5,6,7,8)
gen employment3=1 if inlist(f3q24_econ_activity,1,2,3)
replace employment3=0 if inlist(f3q24_econ_activity,4,5,6,7,8)

* Reorganize data
reshape long employment indweight yearssincearrive, i(ref) j(wave)
replace yearssincearrive=int(yearssincearrive)

* Count numbers of nonmissing observations
gen Nemp=!missing(employment)

preserve
collapse (mean) employment (rawsum) Nemp [aw=indweight], by(female yearssincearrive migranttype)
drop if missing(female)
save "UK-SNR", replace
restore
preserve
collapse (mean) employment (rawsum) Nemp [aw=indweight], by(yearssincearrive migranttype)
append using "UK-SNR"
order yearssincearrive migranttype female employment Nemp
sort migranttype female yearssincearrive
save "UK-SNR", replace
restore

********************************************************************************
*** Calculate sample descriptives

keep if !missing(indweight)
drop if wave==0
keep if migranttype==1

* # Observations
count
* # Unique individuals
egen persTag=tag(ref)
tab persTag

* Gender
sum female [aw=indweight]
* Age
recode age_cat (1=21)(2=29.5)(3=39.5)(4=54.5), gen(age_cat_numbered)
sum age_cat_numbered [aw=indweight], detail

* Country of origin
preserve
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=indweight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=10
restore

* LM outcomes
count if !missing(employment)
sum employment [aw=indweight], detail

********************************************************************************
*** Clean up

clear all
